Installing the libraries that are needed to be able to load and preform queries on the dataset.
library(DataComputing)
library(tidyverse)
library(rvest)
library(lubridate)
library(dplyr)
library(readr)
library(ggplot2)
Load Data
Getting the tables into RStudio to be able to use them for evaluation. We will do this using the read functions.
file_name <- file.choose()
sampleSubmission <- data.table::fread(file_name)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
sampleSubmission
file_name <- file.choose()
TrainData <- data.table::fread(file_name)
Some columns are type 'integer64' but package bit64 is not installed. Those columns will print as strange looking floating point data. There is no need to reload the data. Simply install.packages('bit64') to obtain the integer64 print method and print the data again.
TrainData
file_name <- file.choose()
TestData <- data.table::fread(file_name)
Some columns are type 'integer64' but package bit64 is not installed. Those columns will print as strange looking floating point data. There is no need to reload the data. Simply install.packages('bit64') to obtain the integer64 print method and print the data again.
TestData
Data Wrangling
happinessTable %>%
rename(RevenueGrowth = `Revenue Growth`) %>%
rename(CostRevenue = `Cost of Revenue`) %>%
rename(GrossProfit = `Gross Profit`) %>%
rename(RDExpenses = `R&D Expenses`) %>%
rename(SGAExpense= `SG&A Expense`) %>%
rename(OpExpenses = `Operating Expenses`) %>%
rename(OpIncome = `Operating Income`) %>%
rename(InterestExpense = `Interest Expense`) %>%
rename(IncomeTaxExpense = `Income Tax Expense`) %>%
rename(NetIncomeWOInterst = `Net Income - Non-Controlling int`) %>%
rename(NetIncomeDisOps = `Net Income - Discontinued ops`) %>%
rename(NetIncome= `Net Income`) %>%
rename(PerferredDividends = `Preferred Dividends`) %>%
rename(NetIncomeCOM = `Net Income Com`) %>%
rename(EPSDiluted = `EPS Diluted`) %>%
rename(WeightedAvSHS = `Weighted Average Shs Out`) %>%
rename(WeightedAvSHSDil = `Weighted Average Shs Out (Dil)`) %>%
rename(DividendPerShare = `Dividend per Share`) %>%
rename(GrossMargin = `Gross Margin`) %>%
rename(EBITDAMargin = `EBITDA Margin`) %>%
rename(EBITMargin = `EBIT Margin`) %>%
rename(ProfitMargin = `Profit Margin`) %>%
rename(FreeCashFlowMargin = `Free Cash Flow margin`) %>%
rename(ConsolidatedIncome = `Consolidated Income`) %>%
rename(EarningsBefTaxMargin = `Earnings Before Tax Margin`) %>%
rename(NetProfitMargin= `Net Profit Margin`) %>%
rename(CashCashEquivalents = `Cash and cash equivalents`) %>%
rename(ShortTermInvestements = `Short-term investments`) %>%
rename()
LS0tCnRpdGxlOiAiUHJvamVjdCBQaGFzZSAyIiAKc3VidGl0bGU6ICJEUzMxMCIKYXV0aG9yOiAiQXJ3YSBIYXJhcndhbGEsIEtyaXRoaWthIFNlbnRoaWwsICYgU3dhcmFsaSBLb3JnYW9ua2FyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpJbnN0YWxsaW5nIHRoZSBsaWJyYXJpZXMgdGhhdCBhcmUgbmVlZGVkIHRvIGJlIGFibGUgdG8gbG9hZCBhbmQgcHJlZm9ybSBxdWVyaWVzIG9uIHRoZSBkYXRhc2V0LgpgYGB7cn0KbGlicmFyeShEYXRhQ29tcHV0aW5nKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShydmVzdCkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkocmVhZHIpCmxpYnJhcnkoZ2dwbG90MikKYGBgCgoKIyMjIExvYWQgRGF0YQpHZXR0aW5nIHRoZSB0YWJsZXMgaW50byBSU3R1ZGlvIHRvIGJlIGFibGUgdG8gdXNlIHRoZW0gZm9yIGV2YWx1YXRpb24uIFdlIHdpbGwgZG8gdGhpcyB1c2luZyB0aGUgcmVhZCBmdW5jdGlvbnMuIApgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKc2FtcGxlU3VibWlzc2lvbiA8LSBkYXRhLnRhYmxlOjpmcmVhZChmaWxlX25hbWUpCnNhbXBsZVN1Ym1pc3Npb24KYGBgCgpgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKVHJhaW5EYXRhIDwtIGRhdGEudGFibGU6OmZyZWFkKGZpbGVfbmFtZSkKVHJhaW5EYXRhCmBgYAoKCgpgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKVGVzdERhdGEgPC0gZGF0YS50YWJsZTo6ZnJlYWQoZmlsZV9uYW1lKQpUZXN0RGF0YQpgYGAKCgojIyMgRGF0YSBXcmFuZ2xpbmcKCmBgYHtyfQpoYXBwaW5lc3NUYWJsZSAlPiUKICByZW5hbWUoUmV2ZW51ZUdyb3d0aCA9IGBSZXZlbnVlIEdyb3d0aGApICU+JQogIHJlbmFtZShDb3N0UmV2ZW51ZSA9IGBDb3N0IG9mIFJldmVudWVgKSAlPiUKICByZW5hbWUoR3Jvc3NQcm9maXQgPSBgR3Jvc3MgUHJvZml0YCkgJT4lCiAgcmVuYW1lKFJERXhwZW5zZXMgPSBgUiZEIEV4cGVuc2VzYCkgJT4lCiAgcmVuYW1lKFNHQUV4cGVuc2U9IGBTRyZBIEV4cGVuc2VgKSAlPiUKICByZW5hbWUoT3BFeHBlbnNlcyA9IGBPcGVyYXRpbmcgRXhwZW5zZXNgKSAlPiUKICByZW5hbWUoT3BJbmNvbWUgPSBgT3BlcmF0aW5nIEluY29tZWApICU+JQogIAogIHJlbmFtZShJbnRlcmVzdEV4cGVuc2UgPSBgSW50ZXJlc3QgRXhwZW5zZWApICU+JQogIHJlbmFtZShJbmNvbWVUYXhFeHBlbnNlID0gYEluY29tZSBUYXggRXhwZW5zZWApICU+JQogIHJlbmFtZShOZXRJbmNvbWVXT0ludGVyc3QgPSBgTmV0IEluY29tZSAtIE5vbi1Db250cm9sbGluZyBpbnRgKSAlPiUKICByZW5hbWUoTmV0SW5jb21lRGlzT3BzID0gYE5ldCBJbmNvbWUgLSBEaXNjb250aW51ZWQgb3BzYCkgJT4lCiAgcmVuYW1lKE5ldEluY29tZT0gYE5ldCBJbmNvbWVgKSAlPiUKICByZW5hbWUoUGVyZmVycmVkRGl2aWRlbmRzID0gYFByZWZlcnJlZCBEaXZpZGVuZHNgKSAlPiUKICByZW5hbWUoTmV0SW5jb21lQ09NID0gYE5ldCBJbmNvbWUgQ29tYCkgJT4lCiAgCiAgcmVuYW1lKEVQU0RpbHV0ZWQgPSBgRVBTIERpbHV0ZWRgKSAlPiUKICByZW5hbWUoV2VpZ2h0ZWRBdlNIUyA9IGBXZWlnaHRlZCBBdmVyYWdlIFNocyBPdXRgKSAlPiUKICByZW5hbWUoV2VpZ2h0ZWRBdlNIU0RpbCA9IGBXZWlnaHRlZCBBdmVyYWdlIFNocyBPdXQgKERpbClgKSAlPiUKICByZW5hbWUoRGl2aWRlbmRQZXJTaGFyZSA9IGBEaXZpZGVuZCBwZXIgU2hhcmVgKSAlPiUKICByZW5hbWUoR3Jvc3NNYXJnaW4gPSBgR3Jvc3MgTWFyZ2luYCkgJT4lCiAgcmVuYW1lKEVCSVREQU1hcmdpbiA9IGBFQklUREEgTWFyZ2luYCkgJT4lCiAgcmVuYW1lKEVCSVRNYXJnaW4gPSBgRUJJVCBNYXJnaW5gKSAlPiUKICAKICByZW5hbWUoUHJvZml0TWFyZ2luID0gYFByb2ZpdCBNYXJnaW5gKSAlPiUKICByZW5hbWUoRnJlZUNhc2hGbG93TWFyZ2luID0gYEZyZWUgQ2FzaCBGbG93IG1hcmdpbmApICU+JQogIHJlbmFtZShDb25zb2xpZGF0ZWRJbmNvbWUgPSBgQ29uc29saWRhdGVkIEluY29tZWApICU+JQogIHJlbmFtZShFYXJuaW5nc0JlZlRheE1hcmdpbiA9IGBFYXJuaW5ncyBCZWZvcmUgVGF4IE1hcmdpbmApICU+JQogIHJlbmFtZShOZXRQcm9maXRNYXJnaW49IGBOZXQgUHJvZml0IE1hcmdpbmApICU+JQogIHJlbmFtZShDYXNoQ2FzaEVxdWl2YWxlbnRzID0gYENhc2ggYW5kIGNhc2ggZXF1aXZhbGVudHNgKSAlPiUKICByZW5hbWUoU2hvcnRUZXJtSW52ZXN0ZW1lbnRzID0gYFNob3J0LXRlcm0gaW52ZXN0bWVudHNgKSAlPiUKICAKICByZW5hbWUoKQogIAogIAogIApgYGAKCgo=